RMSEs from Time Series CV on Training Data

AGU manuscript

Author

Katherine Goode

Published

September 23, 2025

Set Up

Load packages:

library(dplyr)
library(forcats)
library(ggplot2)
library(stringr)
library(tidyr)
library(wesanderson)

Specify file path:

fp = "~/../../Volumes/subseasonal_extreme/agu-manuscript-code/EESNs/"

RMSEs

Data Steps

Load and clean RMSEs:

cv_rmses <-
  readr::read_csv(
    file = paste0(fp, "01-tuning-time-series-cv/results/cv-rmses.csv"), 
    show_col_types = FALSE
  ) |>
  mutate(file = str_remove(file, ".csv")) |>
  separate(
    file,
    c(
      "target_region",
      "tau",
      "m",
      "nh",
      "nu"
    ),
    remove = FALSE
  ) |>
  mutate(
    target_region = str_remove(target_region, "targetregion"),
    tau = as.numeric(str_remove(tau, "tau")),
    m = as.numeric(str_remove(m, "m")),
    nh = as.numeric(str_remove(nh, "nh")),
    nu = as.numeric(str_remove(nu, "nu")) / 100
  )

Overall Best

Determine which parameters resulted in the lowest RMSEs:

best_rmses <-
  cv_rmses |>
  filter(
    rmse == min(rmse, na.rm = TRUE), 
    .by = c(target_region, tau)
  )

Print the best RMSEs:

best_rmses |>
  mutate(
    rmse = round(rmse, 2),
    rmse_extreme = round(rmse_extreme, 2)
  ) |>
  select(-file) |>
  arrange(target_region, tau) |>
  knitr::kable()
target_region tau m nh nu rmse rmse_extreme
MW 1 0 50 0.35 2.51 3.49
MW 2 1 50 0.10 2.93 4.38
MW 3 2 50 0.85 2.98 4.48
MW 4 4 50 0.85 3.00 4.55
NE 1 0 50 0.35 2.36 3.36
NE 2 1 50 0.10 2.68 4.07
NE 3 2 50 0.85 2.71 4.17
NE 4 4 50 0.85 2.74 4.21
SE 1 0 50 0.10 2.09 3.07
SE 2 1 50 0.85 2.36 3.63
SE 3 2 50 0.85 2.38 3.69
SE 4 4 50 0.85 2.40 3.73
SW 1 0 50 0.10 1.94 2.89
SW 2 0 50 0.85 2.06 3.16
SW 3 3 50 0.85 2.06 3.20
SW 4 4 50 0.85 2.06 3.22
W 1 0 50 0.10 2.32 3.37
W 2 4 50 0.60 2.44 3.70
W 3 4 50 0.85 2.44 3.71
W 4 4 50 0.85 2.44 3.72

Save best rmses:

write.csv(
  best_rmses,
  paste0(fp, "01-tuning-time-series-cv/results/cv-rmses-best.csv"),
  row.names = FALSE
)

Plot the best RMSEs:

Plot the corresponding RMSEs on extremes:

Best By Tuning Parameter

Determine lowest RMSEs for each value of nu, input vars, forecast horizon, and target region:

best_rmses_by_param <-
  cv_rmses |>
  select(-rmse_extreme) |>
  filter(
    rmse == min(rmse, na.rm = TRUE), 
    .by = c(target_region, tau, nu)
  ) |>
  mutate(best_rmse = TRUE)
  
best_rmses_extremes_by_param <-
  cv_rmses |>
  select(-rmse) |>
  filter(
    rmse_extreme == min(rmse_extreme, na.rm = TRUE), 
    .by = c(target_region, tau, nu)
  ) |>
  mutate(best_rmse = TRUE)

Plot the best RMSEs for each set of model iterations:

Plot the best RMSEs for each set of model iterations (considering extremes only):

Relationship to Tuning Params

Plot of RMSEs (training data):

Plot of RMSEs for extreme observations only (testing data):